{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4eb26fdb-ccf6-465f-b966-73057473d0e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import nltk\n",
    "import re\n",
    "from nltk.corpus import stopwords\n",
    "from sentence_transformers import SentenceTransformer, util"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "1ecd9e2f-e68a-45e7-8187-c2346f3e0127",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/lang_utils.ipynb\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8c8e566f-02ab-4793-b2d2-f873e851c5e2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           category                                               text\n",
      "0              tech  tv future in the hands of viewers with home th...\n",
      "1          business  worldcom boss  left books alone  former worldc...\n",
      "2             sport  tigers wary of farrell  gamble  leicester say ...\n",
      "3             sport  yeading face newcastle in fa cup premiership s...\n",
      "4     entertainment  ocean s twelve raids box office ocean s twelve...\n",
      "...             ...                                                ...\n",
      "2220       business  cars pull down us retail figures us retail sal...\n",
      "2221       politics  kilroy unveils immigration policy ex-chatshow ...\n",
      "2222  entertainment  rem announce new glasgow concert us band rem h...\n",
      "2223       politics  how political squabbles snowball it s become c...\n",
      "2224          sport  souness delight at euro progress boss graeme s...\n",
      "\n",
      "[2225 rows x 2 columns]\n"
     ]
    }
   ],
   "source": [
    "bbc_df = pd.read_csv(\"../data/bbc-text.csv\")\n",
    "print(bbc_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "80446b8a-437f-4767-a8fd-2a6cfea0133b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = SentenceTransformer('all-MiniLM-L6-v2')\n",
    "embeddings = model.encode(bbc_df[\"text\"], convert_to_tensor=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "10b36047-68c1-44a5-8f63-54ec1c62a7b3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[117, 168, 192, 493, 516, 530, 638, 827, 883, 1082, 1154, 1208, 1257, 1359, 1553, 1594, 1650, 1898, 1938, 2059, 2152], [76, 178, 290, 337, 497, 518, 755, 923, 1057, 1105, 1151, 1172, 1242, 1560, 1810, 1813, 1882, 1942, 1981], [150, 281, 376, 503, 758, 900, 1156, 1405, 1633, 1636, 1645, 1940, 1946, 1971], [389, 399, 565, 791, 1014, 1018, 1259, 1288, 1440, 1588, 1824, 1917, 2024], [373, 901, 1004, 1037, 1041, 1323, 1499, 1534, 1580, 1621, 1751, 2178], [42, 959, 1063, 1244, 1292, 1304, 1597, 1915, 2081, 2104, 2128], [186, 193, 767, 787, 1171, 1284, 1625, 1651, 1797, 2148], [134, 388, 682, 1069, 1476, 1680, 2106, 2129, 2186, 2198]]\n"
     ]
    }
   ],
   "source": [
    "clusters = util.community_detection(embeddings, threshold=0.7, min_community_size=10)\n",
    "print(clusters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "81c7d672-fa2c-49a0-acea-9138298fcc82",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_words_by_cluster(clusters, input_df):\n",
    "    for i, cluster in enumerate(clusters):\n",
    "        print(f\"\\nCluster {i+1}, {len(cluster)} elements \")\n",
    "        sentences = input_df.iloc[cluster][\"text\"]\n",
    "        all_text = \" \".join(sentences)\n",
    "        freq_words = get_most_frequent_words(all_text)\n",
    "        print(freq_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "bfd14d41-68b7-4c7e-9aa6-47c513516b06",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cluster 1, 21 elements \n",
      "['mr', 'labour', 'brown', 'said', 'blair', 'election', 'minister', 'prime', 'chancellor', 'would', 'party', 'new', 'campaign', 'told', 'government', 'book', 'gordon', 'next', 'claims', 'tony', 'milburn', 'britain', 'way', 'peston', 'get', 'general', 'economy', 'bbc', 'cabinet', 'people', 'role', 'stand', 'tory', 'back', 'could', 'mps', 'speech', 'also', 'public', 'manifesto', 'voters', 'country', 'speculation', 'leader', 'insisted', 'like', 'allies', 'iraq', 'prescott', 'news', 'think', 'time', 'two', 'conference', 'plans', 'one', 'sunday', 'good', 'meeting', 'every', 'reports', 'pair', 'war', 'rift', 'howard', 'nothing', 'alan', 'ever', 'men', 'may', 'taking', 'make', 'june', 'mutual', 'trust', 'believe', 'parliamentary', 'unified', 'conservative', 'lost', 'services', 'say', 'third', 'term', 'us', 'issues', 'changed', 'following', 'tories', 'record', 'central', 'according', 'robert', 'november', 'mind', 'liberal', 'put', 'key', 'poll', 'monday', 'unity', 'cuts', 'line', 'planning', 'promised', 'take', 'part', 'dismissed', 'deals', 'programme', 'pledge', 'animosity', 'john', 'suspicion', 'manoeuvring', 'says', 'media', 'fight', 'asked', 'set', 'still', 'expected', 'cut', 'economic', 'years', 'claim', 'went', 'months', 'telegraph', 'journalist', 'newspaper', 'dinner', 'interview', 'disaster', 'made', 'poverty', 'trying', 'former', 'mp', 'michael', 'democrat', 'personal', 'need', 'big', 'going', 'political', 'radio', 'know', 'conservatives', 'first', 'home', 'tax', 'centre', 'stability', 'power', 'later', 'message', 'quit', 'six', 'felt', 'deputy', 'intervention', 'see', 'private', 'particularly', 'asian', 'tsunami', 'decision', 'plan', 'accused', 'ambition', 'play', 'added', 'strategy', 'health', 'place', 'job', 'even', 'local', 'questions', 'faced', 'westminster', 'taxes', 'launch', 'vote', 'choice', 'gateshead', 'never', 'hospitals', 'continue', 'clearly', 'saturday', 'jobs', 'intervened', 'ago', 'concentrating', 'actually', 'hosted', 'help', 'year']\n",
      "\n",
      "Cluster 2, 19 elements \n",
      "['yukos', 'us', 'said', 'russian', 'oil', 'gazprom', 'court', 'rosneft', 'russia', 'yugansk', 'company', 'bankruptcy', 'auction', 'firm', 'unit', 'sale', 'khodorkovsky', 'tax', 'government', 'mr', 'baikal', 'yuganskneftegas', 'bank', 'group', 'state', 'assets', 'legal', 'would', 'back', 'protection', 'production', 'sold', 'case', 'bought', 'deutsche', 'gas', 'analysts', 'giant', 'also', 'main', 'putin', 'part', 'bill', 'finance', 'mikhail', 'menatep', 'forced', 'taxes', 'law', 'houston', 'authorities', 'pay', 'last', 'december', 'filed', 'claims', 'political', 'companies', 'jurisdiction', 'action', 'ambitions', 'fraud', 'president', 'year', 'damages', '27bn', 'moscow', 'however', 'claim', 'deal', 'filing', 'control', 'business', 'judge', 'evasion', 'attempt', 'energy', 'could', 'told', 'lawyers', 'claimed', 'key', 'chapter', 'illegal', 'courts', 'founder', 'accounts', 'one', 'chief', 'help', 'shareholders', 'says', 'yuganskneftegaz', 'believe', 'bid', 'former', '20bn', 'kremlin', 'came', 'little-known', 'rights', 'since', 'turn', 'charges', 'china', 'state-owned', 'vladimir', 'made', 'texas', 'ongc', 'firms', 'international', 'clark', 'hearing', 'owner', 'say', 'buyer', 'still', 'order', '9.4bn', 'less', 'jail', 'money', 'india', 'four', 'gazpromneft', 'dispute', 'fines', 'go', 'buy', 'merge', 'large', 'loan', 'industry', 'press', 'shell', 'asset', 'involved', 'sunday', 'thursday', 'market', 'banks', 'expected', 'injunction', 'analyst', 'purchase', 'property', 'monopoly', 'two', 'continue', 'end', 'unknown', 'ruling', 'day', 'sued', 'role', 'unpaid', 'use', 'take', 'whether', 'letitia', 'days', '9.3bn', 'agreed', 'seized', 'arm', 'accused', 'according', 'time', 'value', 'arbitration', 'see', 'prevent', 'punishment', 'currently', 'many', 'output', 'hands', 'spokesman', 'tuesday', 'boss', 'within', 'crude', 'biggest', 'seeking', 'taking', '27.5bn', 'stop', 'rule', 'get', 'previously', 'state-controlled', 'paid', 'chinese', 'cnpc', 'called', 'officials', 'meanwhile', 'chunk', 'richest']\n",
      "\n",
      "Cluster 3, 14 elements \n",
      "['kenteris', 'greek', 'thanou', 'iaaf', 'said', 'athens', 'tests', 'drugs', 'olympics', 'charges', 'also', 'decision', 'test', 'athletics', 'missing', 'tribunal', 'sprinters', 'pair', 'athletes', 'olympic', 'games', 'tzekos', 'kostas', 'evidence', 'katerina', 'face', 'federation', 'sport', 'motorcycle', 'doping', 'suspended', 'failing', 'hearing', 'chicago', 'appeal', 'committee', 'accident', 'take', 'ruling', 'coach', 'tel', 'aviv', 'hospital', 'charged', 'faking', 'crash', 'court', 'arbitration', 'independent', 'last', 'ioannidis', 'body', 'missed', 'christos', '200m', 'sydney', 'criminal', 'august', 'eve', 'duo', 'cleared', 'withdrew', 'trial', 'former', 'bans', 'greece', 'lawyer', 'international', 'avoiding', 'officials', '100m', 'silver', 'give', 'four', 'segas', 'would', 'set', 'expected', 'december', 'told', 'banned', 'given', 'explanations', 'added', 'two-year', 'opening', 'ceremony', 'days', 'provisional', 'case', 'three', 'could', 'village', 'association', 'gregory', 'provisionally', 'found', 'guilty', 'champion', 'whether', 'final', 'confident', 'sprinter', 'claiming', 'end', 'federations', 'gold', 'anti-doping', 'year', 'one', 'authorities', 'week', 'women', 'statement', 'disciplinary', 'determine', 'right', 'presented', 'new', 'find', 'dropped', 'later', 'start', 'brought', 'chairman', 'february', 'sevastis', 'however', 'overturned', 'bbc', 'time', 'allegedly', 'years', 'submit', 'verdict', 'decide', 'suspensions', 'substances', 'date', 'breaches', 'client', 'early', 'governing', 'unacceptable', 'cases', 'medallist', 'samples', 'prosecutors', 'led', 'insisted', 'means', 'innocent', 'optimistic', 'certain', 'believe', 'taken', 'asked', 'refusing', 'tested', 'denied', 'expect', 'panagopoulos', 'notify', 'whereabouts', 'described', 'action', 'yet', 'world', 'discovered', 'dramatically', 'spent', 'injured', 'way', 'mr', 'shows', 'spokesman', 'hellenic', 'whatever', 'two', 'rules', 'sporting', 'rights', 'proves', 'went', 'call', 'clear', 'light', 'ban', 'sent', 'compete', 'alleged', 'facing', 'back', 'wada', 'pound', 'suspension', 'controversial', 'failed', 'appear', 'midnight']\n",
      "\n",
      "Cluster 4, 13 elements \n",
      "['mr', 'tax', 'howard', 'labour', 'would', 'said', 'tory', 'election', 'government', 'taxes', 'blair', 'spending', 'tories', 'party', 'cuts', 'people', 'plans', 'services', 'britain', 'public', 'cut', 'michael', 'leader', 'money', 'also', 'chancellor', 'voters', 'brown', 'say', 'waste', '£35bn', 'choice', 'pay', 'budget', 'savings', 'going', 'back', 'liberal', 'says', 'go', 'clear', 'conservatives', 'new', 'proposals', 'lib', 'added', 'time', 'saying', 'conservative', 'give', 'value', 'less', 'one', 'problems', 'democrats', 'campaign', 'kennedy', 'told', 'tony', 'working', 'schools', 'failed', 'manifesto', 'message', 'increase', 'lower', 'economy', 'year', 'council', 'get', 'wrong', 'next', 'left', 'see', 'letwin', 'big', 'could', 'home', 'poll', 'minister', 'immigration', 'families', 'much', 'make', 'hospitals', 'higher', 'let', 'first', 'borrowing', 'frontline', 'finally', 'general', 'black', 'hole', 'claim', 'taxation', 'prime', 'dems', 'two', 'policies', 'yet', 'chance', 'asylum', 'accused', 'tackle', 'direction', 'hold', 'claimed', 'worst', 'without', 'promises', 'key', 'mps', 'shadow', 'jackson', 'billion', 'although', 'pre-election', 'vote', 'police', 'however', 'put', 'faced', 'well', 'day', 'like', 'iraq', 'charles', 'pledged', 'pledge', 'fund', 'rather', 'went', 'majority', 'may', 'everyone', 'country', 'nation', 'spokesman', 'response', 'coming', 'want', 'change', 'record', 'able', 'monday', 'robert', 'decided', 'scale', 'used', 'many', 'even', 'income', 'come', 'taken', 'bureaucracy', 'gordon', 'inheritance', 'economic', 'policy', 'rises', 'add', 'launched', 'hopes', 'hit', 'war', 'part', 'probably', 'parties', 'spend', 'last', 'need', 'raise', 'increases', 'dem', 'hard', 'good', 'dodgy', 'us', 'mass', 'face', 'uk', 'heading', 'way', 'facing', 'decisions', 'kemp', 'character', 'ever', 'ahead', 'turn', 'set', 'grip', 'disorder', 'result', 'today', 'crime', 'months', 'rates', '£4bn']\n",
      "\n",
      "Cluster 5, 12 elements \n",
      "['best', 'film', 'aviator', 'director', 'actor', 'foxx', 'swank', 'actress', 'baby', 'million', 'dollar', 'said', 'win', 'eastwood', 'jamie', 'oscars', 'ray', 'awards', 'scorsese', 'hilary', 'oscar', 'named', 'also', 'star', 'mr', 'prize', 'favourite', 'charles', 'nominated', 'martin', 'year', 'sideways', 'clint', 'staunton', 'supporting', 'award', 'academy', 'hollywood', 'producers', 'imelda', 'dicaprio', 'actors', 'golden', 'tipped', 'performance', 'category', 'second', 'owen', 'uk', 'movie', 'evans', 'nominees', 'drama', 'ceremony', 'blanchett', 'clive', 'screen', 'bookmakers', 'william', 'hill', 'get', 'february', 'night', 'cate', 'winslet', 'chance', 'comedy', 'tv', 'vera', 'drake', 'mann', 'globes', 'among', 'nomination', 'top', 'nominations', 'playing', 'winning', 'kate', 'taylor', 'take', 'us', 'odds', 'bet', 'role', 'two', 'five', 'picture', 'took', 'including', 'critics', 'would', 'globe', 'los', 'angeles', 'leonardo', 'due', 'ampas', 'graham', 'hughes', 'winners', 'time', 'career', 'hackford', 'thank', 'taking', 'triumphed', 'british', 'show', 'gant', 'mike', 'leigh', 'guild', 'meanwhile', 'bond', 'got', 'people', 'producer', 'number', 'picked', 'king', 'years', 'love', 'stars', 'morgan', 'freeman', 'biopic', 'female', 'hepburn', 'bbc', 'think', 'total', 'dams', 'heat', 'one', 'live', 'place', 'closer', 'event', 'next', 'man', 'race', 'list', 'limit', 'three', 'credits', 'motion', 'arts', 'sciences', 'done', 'play', 'name', 'decided', 'saw', 'lead', 'held', 'added', 'beat', 'sunday', 'boxing', 'acting', 'howard', 'many', 'never', 'films', 'annette', 'bening', 'life', 'told', 'first', 'katharine', 'finding', 'neverland', 'series', 'news', 'veteran', 'close', 'french', 'jonathan', 'ross', 'errigo', 'portrayal', 'already', 'good', 'times', 'go', 'sky', 'hopes', 'favourites', 'cast', 'talent', 'half', 'person', 'theatre', 'new', 'ahead', 'world', 'television', 'outsider', 'becoming']\n",
      "\n",
      "Cluster 6, 11 elements \n",
      "['said', 'prices', 'market', 'house', 'uk', 'figures', 'mortgage', 'housing', 'year', 'lending', 'november', 'price', 'december', 'rise', 'rose', 'fell', 'bank', 'halifax', 'last', 'property', 'annual', 'rate', 'month', 'slowdown', 'recent', 'england', 'fall', 'average', 'january', 'sales', 'nationwide', 'since', 'growth', 'new', 'cml', 'interest', 'approvals', 'months', 'quarter', 'number', 'may', 'b', 'showed', 'first', 'period', 'association', 'slowing', 'inflation', 'increase', 'building', 'time', 'would', 'loans', 'overall', 'reported', 'home', 'lenders', 'properties', 'data', 'odpm', 'october', 'value', 'low', 'years', 'seen', 'wimpey', 'however', 'drop', 'biggest', 'bba', 'land', 'registry', 'lowest', 'rates', 'slightly', 'mortgages', 'approved', 'added', 'british', 'survey', 'london', 'monthly', 'fallen', 'west', 'previous', 'made', 'show', 'rises', 'economist', 'strong', 'economists', 'mr', 'level', 'evidence', 'past', 'levels', 'remains', 'despite', 'lender', 'grow', 'indicated', 'council', 'whole', 'remain', 'lower', 'point', 'bankers', 'societies', 'rightmove', 'asking', 'midlands', 'south', 'saw', 'falls', 'second', 'deputy', 'nearly', 'decade', 'wales', 'northern', 'ireland', 'north', 'east', 'continuing', 'regions', 'capital', 'homes', 'society', 'earlier', 'forecast', 'summer', 'could', 'stands', 'decline', 'according', 'buy-to-let', 'continue', 'came', 'surveys', 'week', 'likely', 'demand', 'analysts', 'late', 'sharply', 'end', 'total', 'highest', 'dropped', 'quiet', 'one', 'government', 'less', 'scotland', 'half', 'coming', 'office', 'prime', 'minister', 'suggested', 'still', 'september', 'higher', 'trillion', 'stock', 'private', 'third', 'volume', 'compared', 'bsa', 'gross', 'year-on-year', 'director', 'respectively', 'slight', 'february', 'says', 'means', 'cooled', 'official', 'way', 'bannister', 'followed', 'continued', 'current', 'key', 'thought', 'see', 'small', 'risen', 'tuesday', 'marginally', '£7.1bn', 'major', 'profits', 'reservations', 'expected', 'continues', 'fundamentals', 'real']\n",
      "\n",
      "Cluster 7, 10 elements \n",
      "['lse', 'deutsche', 'boerse', 'bid', 'euronext', 'said', 'exchange', 'london', 'offer', 'stock', 'would', 'also', 'shareholders', 'german', 'market', 'takeover', 'talks', 'cash', 'may', 'tci', 'could', 'chief', 'meeting', 'executive', 'deal', 'rival', 'however', 'frankfurt', 'shareholder', 'make', 'week', 'seifert', 'biggest', 'move', '£1.3bn', 'reported', 'furse', 'business', 'operator', 'europe', 'clearstream', 'last', 'fund', 'speculation', 'held', 'group', 'shares', 'opposition', 'board', 'management', 'liffe', 'exchanges', 'rejected', 'paris', 'amsterdam', 'lisbon', 'reports', 'clearing', 'buy', 'atticus', 'clara', 'friday', 'werner', 'two', 'since', 'december', 'saying', 'undervalued', 'made', 'bidder', 'create', 'newspaper', 'merger', 'derivatives', 'reuters', 'news', 'control', 'share', 'say', 'acquisition', 'euros', 'sunday', 'possible', 'mr', 'pan-european', 'investors', '£1.5bn', 'bidding', 'raise', '£1.4bn', 'approach', 'proposal', 'interests', 'added', 'stage', 'world', 'new', 'take', 'transaction', 'expressed', 'successful', 'headquarters', 'situation', 'planned', 'monday', 'financial', 'times', 'general', 'company', 'already', 'suggested', 'told', 'owns', 'standard', 'life', 'amid', 'tabled', 'paris-based', 'hold', 'ready', 'jean-francois', 'theodore', 'ms', 'thursday', 'third', 'proposed', 'open', 'whether', 'combines', 'either', 'second', 'york', 'suitor', 'equities', 'unit', 'house', 'processes', 'securities', 'transactions', 'stumbling', 'block', 'lower', 'fees', 'put', 'offered', 'far', 'neither', 'formal', 'plans', 'reportedly', 'jobs', 'agency', 'meanwhile', 'critics', 'commentators', 'firm', 'db', '£1.35bn', '2.5bn', 'high', 'operates', 'brussels', 'analysts', 'prepared', 'wait', 'see', 'london-based', 'return', 'benefits', 'back', 'investment', 'opposed', 'quoted', 'hedge', 'capital', 'rather', 'telegraph', 'wants', 'report', 'despite', 'uk', 'merged', 'working', 'capitalisation', 'well', 'cost', 'war', 'dividend', 'better', 'bourse', 'supervisory', 'value', 'woo', 'bosses', 'meet', 'raised', '2.9bn', 'month', 'owner', 'due']\n",
      "\n",
      "Cluster 8, 10 elements \n",
      "['dollar', 'us', 'euro', 'said', 'currency', 'deficit', 'analysts', 'trading', 'yen', 'record', 'exports', 'economic', 'trade', 'markets', 'european', 'economy', 'low', 'new', 'mr', 'europe', 'reserves', 'budget', 'foreign', 'slide', 'year', 'market', 'korea', 'thursday', 'one', 'fall', 'concerns', 'strength', 'meeting', 'ministers', 'central', 'bank', 'deficits', 'strong', 'growth', 'south', 'fallen', 'asia', 'investors', 'bush', 'data', 'gmt', 'november', 'comments', 'snow', 'policy', 'weaker', 'boost', 'also', 'currencies', 'however', 'past', 'demand', 'exchange', 'finance', 'banks', 'monday', 'lost', 'japanese', 'day', 'rise', 'tuesday', 'still', 'would', 'holdings', 'consumer', 'japan', 'lows', 'weak', 'administration', 'week', 'less', 'versus', 'figures', 'dropped', 'schroeder', 'italian', 'minister', 'berlusconi', 'global', 'g7', 'decline', 'likely', 'back', 'buying', 'hit', 'economies', 'president', 'pound', 'start', 'months', 'many', 'problems', 'meanwhile', 'saying', 'asian', 'let', 'could', 'china', 'lower', 'current', 'account', 'around', 'friday', 'concern', 'amplified', 'disappointing', 'chicago', 'weakness', 'expected', 'time', 'german', 'voiced', 'absolutely', 'stability', 'imbalances', 'towards', 'nations', 'per', 'sharply', 'traders', 'blamed', 'value', 'officials', 'action', 'eurozone', 'countries', 'all-time', 'earlier', 'oil', 'prices', 'come', 'treasury', 'secretary', 'john', 'change', 'say', 'need', 'see', 'economists', 'says', 'short', 'george', 'w', 'makes', 'goods', 'major', 'may', 'dollars', 'end', 'last', 'recent', 'plans', 'euros', 'level', 'three', 'pointed', 'recently', 'rally', 'despite', 'positive', 'country', 'came', 'strategist', 'month', 'showed', 'ahead', 'pressure', 'helped', 'main', 'gaymard', 'higher', 'imports', 'commerce', 'department', 'part', 'expanding', 'happy', 'set', 'fundamentals', 'world', 'close', 'size', 'predict', 'worry', 'state', 'intervene', 'volatile', 'technical', 'automated', 'light', 'reactions', 'news', 'adding', 'expect', 'become']\n"
     ]
    }
   ],
   "source": [
    "print_words_by_cluster(clusters, bbc_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c760b999-977b-410a-aa9b-689747a87fd8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
